Registered S3 method overwritten by 'tsibble':
method from
as_tibble.grouped_df dplyr
Attaching package: 'tsibble'
The following object is masked from 'package:lubridate':
interval
The following objects are masked from 'package:base':
intersect, setdiff, union
# Example: Cache la Poudre River at Mouth (USGS site 06752260)poudre_flow <-readNWISdv(siteNumber ="06752260", # Download data from USGS for site 06752260parameterCd ="00060", # Parameter code 00060 = discharge in cfs)startDate ="2013-01-01", # Set the start dateendDate ="2023-12-31") |># Set the end daterenameNWISColumns() |># Rename columns to standard names (e.g., "Flow", "Date")mutate(Date =yearmonth(Date)) |># Convert daily Date values into a year-month format (e.g., "2023 Jan")group_by(Date) |># Group the data by the new monthly Datesummarise(Flow =mean(Flow))
# Convert to tsibble (monthly data, with Date as index)poudre_ts <- poudre_flow |>as_tsibble(index = Date)
2. Plotting the time series
library(ggplot2)library(plotly)
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
# Basic ggplot of flow over timep <-ggplot(poudre_ts, aes(x = Date, y = Flow)) +geom_line(color ="blue") +labs(title ="Monthly Average Streamflow - Cache la Poudre River",y ="Flow (cfs)", x ="Date") +theme_minimal()# Animate with plotlyggplotly(p)
3. Subseries
library(feasts)
Loading required package: fabletools
Attaching package: 'fabletools'
The following object is masked from 'package:yardstick':
accuracy
The following object is masked from 'package:parsnip':
null_model
The following objects are masked from 'package:infer':
generate, hypothesize
# Subseries plot to show seasonal patternspoudre_ts |>gg_subseries(Flow)
4. Decompose
library(fabletools)# STL decomposition with a seasonal window (say 13 months for annual seasonality)decomp <- poudre_ts |>model(STL(Flow ~season(window ="periodic"))) |>components()# Plot the componentsautoplot(decomp)
Daily Exercise 22
1. Use modeltime to forecast the next 12 months of streamflow data in the Poudre River based on last time assignment.
library(modeltime)library(timetk)library(tidymodels)library(lubridate)# Make sure Date is a date objectpoudre_ts <- poudre_ts |>mutate(Date =as.Date(Date)) # Split data into training (all except last year)splits <-initial_time_split(poudre_ts, prop =0.92) # Approx. up to end of 2022# Check splittraining(splits) %>%tail()
# A tsibble: 6 x 2 [1D]
Date Flow
<date> <dbl>
1 2022-08-01 59.9
2 2022-09-01 18.5
3 2022-10-01 69.1
4 2022-11-01 11.2
5 2022-12-01 10.4
6 2023-01-01 7.29
testing(splits) %>%head()
# A tsibble: 6 x 2 [1D]
Date Flow
<date> <dbl>
1 2023-02-01 21.9
2 2023-03-01 28.8
3 2023-04-01 76.8
4 2023-05-01 648.
5 2023-06-01 1499.
6 2023-07-01 188.
2. Use the prophet_reg(), and arima_reg() function to create a Prophet model for forecasting.
library(parsnip)library(lubridate)library(dplyr)library(tidymodels)# Prophet modelprophet_model <-prophet_reg() %>%set_engine("prophet")# ARIMA modelarima_model <-arima_reg() %>%set_engine("auto_arima")# Fit modelsmodel_tbl <-modeltime_table(fit(prophet_model, Flow ~ Date, data =training(splits)),fit(arima_model, Flow ~ Date, data =training(splits)))
Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
frequency = 12 observations per 1 year
3. Use dataRetrieval to download daily streamflow for the next 12 months. Aggregate this data to monthly averages and compare it to the predictions made by your modeltime model.
Error: Column `Date` (index) must not contain `NA`.
Warning: Unknown or uninitialised column: `.key`.
# Aggregate forecasts to monthly meansforecast_monthly <- forecast_tbl |>filter(.key =="prediction") |>mutate(month =floor_date(.index, "month")) |>group_by(month) |>summarise(predicted_cfs =mean(.value, na.rm =TRUE), .groups ="drop") |>rename(Date = month)# RJoin observed and predicted data comparison_tbl <-left_join(forecast_monthly, poudre_2024, by ="Date") |>filter(!is.na(observed_cfs), !is.na(predicted_cfs))
4. Compute the R2 value between the model predictions and the observed data using a linear model and report the meaning.
# Compute R2 using linear modelif(nrow(comparison_tbl) >1) { r2_model <-lm(observed_cfs ~ predicted_cfs, data = comparison_tbl) r2_value <-summary(r2_model)$r.squaredcat("R2 between predictions and observed values:", round(r2_value, 3), "\n")} else {cat("Not enough overlapping data to compute R2 - check monthly alignment.\n")}
R2 between predictions and observed values: 0.919
With this we can see that the R^2 value between predictions and observed values is 0.919. This indicates a strong positive correlation between actual and predicted values and that our model is reliable given the domain of this data.
5. Last, generate a plot of the Predicted vs Observed values and include a 1:1 line, and a linear model line.
library(ggplot2)# Plot with 1:1 line and regression lineggplot(comparison_tbl, aes(x = observed_cfs, y = predicted_cfs)) +geom_point(size =2, color ="steelblue") +geom_abline(intercept =0, slope =1, linetype ="dashed", color ="gray") +geom_smooth(method ="lm", se =FALSE, color ="darkred") +labs(title ="Predicted vs Observed Streamflow (2024)",x ="Observed Flow (cfs)", y ="Predicted Flow (cfs)") +theme_minimal()